{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install onnx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from gliner import GLiNER"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = GLiNER.from_pretrained(\"urchade/gliner_medium\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save\n",
    "\n",
    "model.save_pretrained(\"gliner_medium\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gliner_model = GLiNER.from_pretrained(\"gliner_medium\", load_tokenizer=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "onnx_save_path = os.path.join(\"gliner_medium\", \"model.onnx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"ONNX is an open-source format designed to enable the interoperability of AI models across various frameworks and tools.\"\n",
    "labels = ['format', 'model', 'tool', 'cat']\n",
    "\n",
    "inputs, _ = gliner_model.prepare_model_inputs([text], labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if gliner_model.config.span_mode == 'token_level':\n",
    "    all_inputs =  (inputs['input_ids'], inputs['attention_mask'], \n",
    "                    inputs['words_mask'], inputs['text_lengths'])\n",
    "    input_names = ['input_ids', 'attention_mask', 'words_mask', 'text_lengths']\n",
    "    dynamic_axes={\n",
    "        \"input_ids\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"attention_mask\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"words_mask\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"text_lengths\": {0: \"batch_size\", 1: \"value\"},\n",
    "        \"logits\": {0: \"position\", 1: \"batch_size\", 2: \"sequence_length\", 3: \"num_classes\"},\n",
    "    }\n",
    "else:\n",
    "    all_inputs =  (inputs['input_ids'], inputs['attention_mask'], \n",
    "                    inputs['words_mask'], inputs['text_lengths'],\n",
    "                    inputs['span_idx'], inputs['span_mask'])\n",
    "    input_names = ['input_ids', 'attention_mask', 'words_mask', 'text_lengths', 'span_idx', 'span_mask']\n",
    "    dynamic_axes={\n",
    "        \"input_ids\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"attention_mask\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"words_mask\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"text_lengths\": {0: \"batch_size\", 1: \"value\"},\n",
    "        \"span_idx\": {0: \"batch_size\", 1: \"num_spans\", 2: \"idx\"},\n",
    "        \"span_mask\": {0: \"batch_size\", 1: \"num_spans\"},\n",
    "        \"logits\": {0: \"batch_size\", 1: \"sequence_length\", 2: \"num_spans\", 3: \"num_classes\"},\n",
    "    }\n",
    "print('Converting the model...')\n",
    "torch.onnx.export(\n",
    "    gliner_model.model,\n",
    "    all_inputs,\n",
    "    f=onnx_save_path,\n",
    "    input_names=input_names,\n",
    "    output_names=[\"logits\"],\n",
    "    dynamic_axes=dynamic_axes,\n",
    "    opset_version=14,\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#quantize model\n",
    "from onnxruntime.quantization import quantize_dynamic, QuantType\n",
    "\n",
    "quantized_save_path = os.path.join(\"gliner_medium\", \"model_quantized.onnx\")\n",
    "# Quantize the ONNX model\n",
    "print(\"Quantizing the model...\")\n",
    "quantize_dynamic(\n",
    "    onnx_save_path,  # Input model\n",
    "    quantized_save_path,  # Output model\n",
    "    weight_type=QuantType.QUInt8  # Quantize weights to 8-bit integers\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load onnx model\n",
    "model = GLiNER.from_pretrained(\"gliner_medium\", load_onnx_model=True, load_tokenizer=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"\"\"\n",
    "Libretto by Marius Petipa, based on the 1822 novella ``Trilby, ou Le Lutin d'Argail`` by Charles Nodier, first presented by the Ballet of the Moscow Imperial Bolshoi Theatre on January 25/February 6 (Julian/Gregorian calendar dates), 1870, in Moscow with Polina Karpakova as Trilby and Ludiia Geiten as Miranda and restaged by Petipa for the Imperial Ballet at the Imperial Bolshoi Kamenny Theatre on January 17–29, 1871 in St. Petersburg with Adèle Grantzow as Trilby and Lev Ivanov as Count Leopold.\n",
    "\"\"\"\n",
    "\n",
    "labels = [\"person\", \"book\", \"location\", \"date\", \"actor\", \"character\"]\n",
    "\n",
    "entities = model.predict_entities(text, labels, threshold=0.4)\n",
    "\n",
    "for entity in entities:\n",
    "    print(entity[\"text\"], \"=>\", entity[\"label\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load quantized model\n",
    "model = GLiNER.from_pretrained(\"gliner_medium\", load_onnx_model=True, load_tokenizer=True, onnx_model_file=\"model_quantized.onnx\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"\"\"\n",
    "Libretto by Marius Petipa, based on the 1822 novella ``Trilby, ou Le Lutin d'Argail`` by Charles Nodier, first presented by the Ballet of the Moscow Imperial Bolshoi Theatre on January 25/February 6 (Julian/Gregorian calendar dates), 1870, in Moscow with Polina Karpakova as Trilby and Ludiia Geiten as Miranda and restaged by Petipa for the Imperial Ballet at the Imperial Bolshoi Kamenny Theatre on January 17–29, 1871 in St. Petersburg with Adèle Grantzow as Trilby and Lev Ivanov as Count Leopold.\n",
    "\"\"\"\n",
    "\n",
    "labels = [\"person\", \"book\", \"location\", \"date\", \"actor\", \"character\"]\n",
    "\n",
    "entities = model.predict_entities(text, labels, threshold=0.4)\n",
    "\n",
    "for entity in entities:\n",
    "    print(entity[\"text\"], \"=>\", entity[\"label\"])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
